Our goal here is to go over designing guides for a tiling screen on positive genes. We’ll design CRISPR guide libraries for the top genes in the screen. First we’ll go over how to do one gene and then extend the method to other genes.

Several studies have shown improved interference and activation with FANTOM5 annotated TSS’s. We shall do the same. TSS information was downloaded from http://biomart.gsc.riken.jp/ and lifted over to mm10 using the UCSC liftover tool (https://genome.ucsc.edu/cgi-bin/hgLiftOver).

setwd("~/sgRNA/sgRNAdesign/")
genes = scan("~/sgRNA/Yanxia/genes4tilingscreen.txt", what = character())
FANTOM5mm10TSSannotation = read.table(file = "~/sgRNA/sgRNAdesign/mm10TSSliftover.bed")
head(FANTOM5mm10TSSannotation)
##      V1        V2        V3                           V4  V5 V6        V7
## 1 chr10 100589202 100589203      p2@4930430F08Rik,0.3007  55  - 100589202
## 2 chr10 100589205 100589269      p1@4930430F08Rik,0.3053   0  - 100589205
## 3 chr10 100592365 100592382      p1@1700017N19Rik,0.1939  -3  + 100592365
## 4 chr10 100592386 100592406      p2@1700017N19Rik,0.1988   0  + 100592386
## 5 chr10  10343097  10343101 p1@ENSMUST00000118589,0.0800 -13  +  10343097
## 6 chr10 101681605 101681621            p12@Mgat4c,0.3624 -19  + 101681605
##          V8          V9
## 1 100589203  60,179,113
## 2 100589269  60,179,113
## 3 100592382 211,211,211
## 4 100592406 211,211,211
## 5  10343101 211,211,211
## 6 101681621  60,179,113
Top2TSSs = c()
for(gene in genes){
  Top2TSSs = rbind(Top2TSSs, FANTOM5mm10TSSannotation[grep(paste0("p1@", gene, ","), FANTOM5mm10TSSannotation[,4]),])
  Top2TSSs = rbind(Top2TSSs, FANTOM5mm10TSSannotation[grep(paste0("p2@", gene, ","), FANTOM5mm10TSSannotation[,4]),])
}
Top2TSSs
##           V1        V2        V3                         V4   V5 V6
## 23490  chr15  54745902  54745916              p1@Nov,0.2919  -11  +
## 23489  chr15  54745870  54745881              p2@Nov,0.2772  -46  +
## 15554  chr12  86421628  86421662            p1@Esrrb,0.5901    0  +
## 15569  chr12  86470131  86470150            p2@Esrrb,0.3256   14  +
## 26717  chr16  23988625  23988641             p1@Bcl6,0.3561  -13  -
## 26716  chr16  23988600  23988607             p2@Bcl6,0.3050    4  -
## 138196  chr7  30636514  30636529             p1@Etv2,0.1369 -225  -
## 11368  chr11  84525514  84525542             p1@Lhx1,0.2494    0  -
## 101016 chr11  84525894  84525930             p2@Lhx1,0.2707 -360  -
## 2093   chr10  57486354  57486414             p1@Hsf2,0.4737    0  +
## 2094   chr10  57486418  57486454             p2@Hsf2,0.5200   33  +
## 54772   chr3  34649987  34650005             p1@Sox2,0.4826    0  +
## 54773   chr3  34650024  34650050             p2@Sox2,0.4802   -4  +
## 89595   chr9  55541190  55541209             p1@Isl2,0.4408    0  +
## 89593   chr9  55538635  55538676             p2@Isl2,0.1039    0  +
## 3715   chr10  81559447  81559498              p1@Aes,0.4399    0  +
## 3716   chr10  81559524  81559583              p2@Aes,0.4529    0  +
## 1511   chr10  30842765  30842791             p1@Hey2,0.5586    0  -
## 1513   chr10  30842811  30842831             p2@Hey2,0.5308  -10  -
## 72993   chr6  43309549  43309552            p1@Nobox,0.2041    1  -
## 83229   chr8  12395603  12395621             p1@Sox1,0.2398   84  +
## 140181  chr8  12395874  12395885             p2@Sox1,0.1326  355  +
## 113450 chr18  11052644  11052661            p1@Gata6,0.4626  134  +
## 33421  chr18  11052487  11052510            p2@Gata6,0.5117    0  +
## 29910  chr17  27728937  27728956            p1@Spdef,0.1132    0  -
## 111472 chr17  27729074  27729086            p2@Spdef,0.1740 -123  -
## 22639  chr15 102954474 102954495           p1@Hoxc11,0.4722  -30  +
## 22640  chr15 102954510 102954521           p2@Hoxc11,0.4106   -4  +
## 31078  chr17  35506052  35506069           p1@Pou5f1,0.3172   13  +
## 31077  chr17  35506028  35506049           p2@Pou5f1,0.3152    0  +
## 65220   chr5 123394782 123394826            p1@Mlxip,0.3923    0  +
## 65219   chr5 123394770 123394781            p2@Mlxip,0.3918  -16  +
## 73754   chr6  64729118  64729132            p1@Atoh1,0.3346  -13  +
## 73756   chr6  64729241  64729254            p2@Atoh1,0.3880   95  +
## 31893  chr17  47737036  47737107             p1@Tfeb,0.3986    0  +
## 112581 chr17  47737174  47737207             p2@Tfeb,0.3822  137  +
## 94378   chrX   7967817   7967875            p1@Gata1,0.5548   -2  -
## 94377   chrX   7967802   7967812            p2@Gata1,0.4441    2  -
## 40561   chr1 167689552 167689563            p1@Lmx1a,0.5086    0  +
## 40562   chr1 167689565 167689577            p2@Lmx1a,0.5453    7  +
## 22284  chr14  99298652  99298715             p1@Klf5,0.3062    0  +
## 107327 chr14  99298945  99298989             p2@Klf5,0.1920  254  +
## 21407  chr14  63245219  63245265            p1@Gata4,0.3098    0  -
## 21412  chr14  63271654  63271679            p2@Gata4,0.5574   12  -
## 85405   chr8  72319053  72319071             p1@Klf2,0.2649    0  +
## 141319  chr8  72318893  72318915             p2@Klf2,0.2366 -117  +
## 38846   chr1 118627943 118627951          p1@Tfcp2l1,0.3550    0  +
## 38847   chr1 118627986 118628014          p2@Tfcp2l1,0.4221   41  +
## 62572   chr4  55532465  55532485             p1@Klf4,0.3020    0  -
## 129031  chr4  55532179  55532238             p2@Klf4,0.1941  214  -
## 70482   chr6 122707592 122707608 p1@Nanog,p1@Nanogpd,0.2468    0  +
## 70483   chr6 122707646 122707655 p2@Nanog,p2@Nanogpd,0.2636   53  +
##               V7        V8          V9
## 23490   54745902  54745916  60,179,113
## 23489   54745870  54745881  60,179,113
## 15554   86421628  86421662  60,179,113
## 15569   86470131  86470150  60,179,113
## 26717   23988625  23988641  60,179,113
## 26716   23988600  23988607  60,179,113
## 138196  30636514  30636529 211,211,211
## 11368   84525514  84525542  60,179,113
## 101016  84525894  84525930  60,179,113
## 2093    57486354  57486414  60,179,113
## 2094    57486418  57486454  60,179,113
## 54772   34649987  34650005  60,179,113
## 54773   34650024  34650050  60,179,113
## 89595   55541190  55541209  60,179,113
## 89593   55538635  55538676 211,211,211
## 3715    81559447  81559498  60,179,113
## 3716    81559524  81559583  60,179,113
## 1511    30842765  30842791  60,179,113
## 1513    30842811  30842831  60,179,113
## 72993   43309549  43309552 211,211,211
## 83229   12395603  12395621  60,179,113
## 140181  12395874  12395885 211,211,211
## 113450  11052644  11052661  60,179,113
## 33421   11052487  11052510  60,179,113
## 29910   27728937  27728956 211,211,211
## 111472  27729074  27729086 211,211,211
## 22639  102954474 102954495  60,179,113
## 22640  102954510 102954521  60,179,113
## 31078   35506052  35506069  60,179,113
## 31077   35506028  35506049  60,179,113
## 65220  123394782 123394826  60,179,113
## 65219  123394770 123394781  60,179,113
## 73754   64729118  64729132  60,179,113
## 73756   64729241  64729254  60,179,113
## 31893   47737036  47737107  60,179,113
## 112581  47737174  47737207  60,179,113
## 94378    7967817   7967875  60,179,113
## 94377    7967802   7967812  60,179,113
## 40561  167689552 167689563  60,179,113
## 40562  167689565 167689577  60,179,113
## 22284   99298652  99298715  60,179,113
## 107327  99298945  99298989 211,211,211
## 21407   63245219  63245265  60,179,113
## 21412   63271654  63271679  60,179,113
## 85405   72319053  72319071  60,179,113
## 141319  72318893  72318915  60,179,113
## 38846  118627943 118627951  60,179,113
## 38847  118627986 118628014  60,179,113
## 62572   55532465  55532485  60,179,113
## 129031  55532179  55532238 211,211,211
## 70482  122707592 122707608  60,179,113
## 70483  122707646 122707655  60,179,113
Top2TSSs = data.frame( chr = Top2TSSs[,1], TSS = apply(Top2TSSs[,c(2,3)], 1, mean), strand = sapply(Top2TSSs[,6], function(x) if(x == "+"){return(1)} else{ return(-1)} ), 
                       gene = sapply(Top2TSSs[,4], function(x) sub(".*@", "", gsub("\\,.*","", x))))
Top2TSSs
##          chr       TSS strand    gene
## 23490  chr15  54745909      1     Nov
## 23489  chr15  54745876      1     Nov
## 15554  chr12  86421645      1   Esrrb
## 15569  chr12  86470140      1   Esrrb
## 26717  chr16  23988633     -1    Bcl6
## 26716  chr16  23988604     -1    Bcl6
## 138196  chr7  30636522     -1    Etv2
## 11368  chr11  84525528     -1    Lhx1
## 101016 chr11  84525912     -1    Lhx1
## 2093   chr10  57486384      1    Hsf2
## 2094   chr10  57486436      1    Hsf2
## 54772   chr3  34649996      1    Sox2
## 54773   chr3  34650037      1    Sox2
## 89595   chr9  55541200      1    Isl2
## 89593   chr9  55538656      1    Isl2
## 3715   chr10  81559472      1     Aes
## 3716   chr10  81559554      1     Aes
## 1511   chr10  30842778     -1    Hey2
## 1513   chr10  30842821     -1    Hey2
## 72993   chr6  43309550     -1   Nobox
## 83229   chr8  12395612      1    Sox1
## 140181  chr8  12395880      1    Sox1
## 113450 chr18  11052652      1   Gata6
## 33421  chr18  11052498      1   Gata6
## 29910  chr17  27728946     -1   Spdef
## 111472 chr17  27729080     -1   Spdef
## 22639  chr15 102954484      1  Hoxc11
## 22640  chr15 102954516      1  Hoxc11
## 31078  chr17  35506060      1  Pou5f1
## 31077  chr17  35506038      1  Pou5f1
## 65220   chr5 123394804      1   Mlxip
## 65219   chr5 123394776      1   Mlxip
## 73754   chr6  64729125      1   Atoh1
## 73756   chr6  64729248      1   Atoh1
## 31893  chr17  47737072      1    Tfeb
## 112581 chr17  47737190      1    Tfeb
## 94378   chrX   7967846     -1   Gata1
## 94377   chrX   7967807     -1   Gata1
## 40561   chr1 167689558      1   Lmx1a
## 40562   chr1 167689571      1   Lmx1a
## 22284  chr14  99298684      1    Klf5
## 107327 chr14  99298967      1    Klf5
## 21407  chr14  63245242     -1   Gata4
## 21412  chr14  63271666     -1   Gata4
## 85405   chr8  72319062      1    Klf2
## 141319  chr8  72318904      1    Klf2
## 38846   chr1 118627947      1 Tfcp2l1
## 38847   chr1 118628000      1 Tfcp2l1
## 62572   chr4  55532475     -1    Klf4
## 129031  chr4  55532208     -1    Klf4
## 70482   chr6 122707600      1   Nanog
## 70483   chr6 122707650      1   Nanog
chr = Top2TSSs$chr[-which(duplicated(Top2TSSs$gene))]
strand = Top2TSSs$strand[-which(duplicated(Top2TSSs$gene))]
genes = Top2TSSs$gene[-which(duplicated(Top2TSSs$gene))]
start = rep(0, times = length(genes))
end = rep(0, times = length(genes))
l = 10000
for(i in 1:length(genes)){
  if(strand[i] == 1){
    start[i] = min(Top2TSSs$TSS[which(Top2TSSs$gene == genes[i])])
    end[i] = start[i] - l
  }
  else{
    start[i] = max(Top2TSSs$TSS[which(Top2TSSs$gene == genes[i])])
    end[i] = start[i] + l
  }
}

target.regions = data.frame(chr = chr, 
                            strand = sapply(strand, function(x) if(x == 1){return("+")} else{return("-")}),
                            gene = genes,
                            start = start,
                            end = end)
target.regions = data.frame(chr = target.regions$chr, strand = target.regions$strand, gene = target.regions$gene, start = apply(target.regions[ ,c("start", "end")], 1, min), end = apply(target.regions[ ,c("start", "end")], 1, max)) 
target.regions
##      chr strand    gene     start       end
## 1  chr15      +     Nov  54735876  54745876
## 2  chr12      +   Esrrb  86411645  86421645
## 3  chr16      -    Bcl6  23988633  23998633
## 4   chr7      -    Etv2  30636522  30646522
## 5  chr11      -    Lhx1  84525912  84535912
## 6  chr10      +    Hsf2  57476384  57486384
## 7   chr3      +    Sox2  34639996  34649996
## 8   chr9      +    Isl2  55528656  55538656
## 9  chr10      +     Aes  81549472  81559472
## 10 chr10      -    Hey2  30842821  30852821
## 11  chr6      -   Nobox  43309550  43319550
## 12  chr8      +    Sox1  12385612  12395612
## 13 chr18      +   Gata6  11042498  11052498
## 14 chr17      -   Spdef  27729080  27739080
## 15 chr15      +  Hoxc11 102944484 102954484
## 16 chr17      +  Pou5f1  35496038  35506038
## 17  chr5      +   Mlxip 123384776 123394776
## 18  chr6      +   Atoh1  64719125  64729125
## 19 chr17      +    Tfeb  47727072  47737072
## 20  chrX      -   Gata1   7967846   7977846
## 21  chr1      +   Lmx1a 167679558 167689558
## 22 chr14      +    Klf5  99288684  99298684
## 23 chr14      -   Gata4  63271666  63281666
## 24  chr8      +    Klf2  72308904  72318904
## 25  chr1      + Tfcp2l1 118617947 118627947
## 26  chr4      -    Klf4  55532475  55542475
## 27  chr6      +   Nanog 122697600 122707600

We’ll take a look at these in the genome browser.

Nov

Nov promoter

Nov promoter

This promoter looks very safe to target.

Esrrb

Esrrb promoter

Esrrb promoter

Safe to target.

Bcl6

Bcl6 promoter

Bcl6 promoter

Safe to target.

Etv2

Etv2 promoter

Etv2 promoter

This one is problematic, but not fully. Rbm42 and Haus5 run on the opposite strand, so we can probably target up to the TTS of these genes (30650317 and 30664994, respectively).

Lhx1

Lhx1 promoter

Lhx1 promoter

There is a lncRNA on the opposite strand (Lhx1os with TSS 84525660). If we target further upstream of this, we should be fine. We should also check the distribution of the guides to ensure that the effect is not due to Lhx1os.

Hsf2

Hsf2 promoter

Hsf2 promoter

Again, we have the same issue. We have to target further upstream to the TSS of 4930467K11Rik (57,486,351).

Sox2

Sox2 promoter

Sox2 promoter

Again a lncRNA is nearby, but this time on the same strand as the gene. We can target downstream of the TSS of Sox2ot (34,638,174) to ensure we are targetting Sox2.

Isl2

Isl2 promoter

Isl2 promoter

There is a gene in the opposite direction of Isl2, Etfa (TSS: 55,512,243). We will have to target about 5kb in the opposite direction to ensure we’re not targetting Etfa.

Aes

Aes promoter

Aes promoter

Again, there’s a gene in the opposite direction, Gna11 (TSS: 81,545,190). We’ll have to do the same as above.

Hey2

Hey2 promoter

Hey2 promoter

This gene looks safe to target.

Nobox

Nobox promoter

Nobox promoter

This gene looks safe to target.

Sox1

Sox1 promoter

Sox1 promoter

There is a predicted gene with a TSS upstream of the TSS, but I’m not sure how much we have to do about this.

Gata6

Gata6 promoter

Gata6 promoter

There are lncRNAs in the opposite direction that we may have to worry about. 1010001N08Rik with TSS at 11,052,567.

Spdef

Spdef promoter

Spdef promoter

There is a gene nearby, but it runs in the same direction as Spdef, so I don’t think there’s much we have to worry about as long as we don’t target in the gene (D17Wsu92e with TSS 27,751,232).

Hoxc11

Hoxc11 promoter

Hoxc11 promoter

There a whole mess here. If we target Hoxc11, we might also target Hotair. I don’t know if we should even include this gene.

Pou5f1

Pou5f1 promoter

Pou5f1 promoter

The only gene nearby is Tcf19, but this runs in the opposite direction so I don’t think we have to worry about this.

Mlxip

Mlxip promoter

Mlxip promoter

There is a gene running in the opposite direction Bcl7a with TTS 123,374,992.

Atoh1

Atoh1 promoter

Atoh1 promoter

This gene looks fine.

Tfeb

Tfeb promoter

Tfeb promoter

This gene looks problematic. The promoter for Pgc (TSS: 47,734,482) essentially overlaps with Tfeb. This gene will have to be verified.

Gata1

Gata1 promoter

Gata1 promoter

This gene looks good.

Lmx1a

Lmx1a promoter

Lmx1a promoter

This gene looks good.

Klf5

Klf5 promoter

Klf5 promoter

This gene looks good.

Gata4

Gata4 promoter

Gata4 promoter

This gene looks good.

Klf2

Klf2 promoter

Klf2 promoter

There is a pseudogene in the opposite direction that we may have to worry about. Gm10282 has a TSS at 72,305,260, so we will have to limit ourselves to about 5kb away from this.

Tfcp2l1

Tfcp2l1 promoter

Tfcp2l1 promoter

Again a problem here. We will have to limit ourselves to the TSS of Clasp1 (118,612,678).

Klf4

Klf4 promoter

Klf4 promoter

There is a predicted gene about 20Kb from the TSS of Klf4. We should be relatively safe, but to be sure we can target up to 5kb from the TSS of Gm12511 (55,563,265).

Nanog

Nanog promoter

Nanog promoter

This gene is safe.

Designing guides.

library(Biostrings)
library(GenomicRanges)
target.region.gr = GRanges(target.region$chr, IRanges(start = min(c(target.region$start, target.region$end)), end = max(c(target.region$start, target.region$end))), strand = target.region$strand)
library(BSgenome.Mmusculus.UCSC.mm10)
target.seq = getSeq(BSgenome.Mmusculus.UCSC.mm10, target.region.gr)
target.seq
pam = "GG"
pam.matches = vmatchPattern(pam, target.seq, max.mismatch = 0)
length(pam.matches[[1]])
head(pam.matches)
pam.reverse = "CC"
pam.reverse.matches = vmatchPattern(pam.reverse, target.seq, max.mismatch = 0)
length(pam.reverse.matches[[1]])
head(pam.reverse.matches)